create_cs_scatter_plot <- function(type, plot_df){
obj <- cor.test(
plot_df$predicted_fraction,
plot_df$mean_fraction)
p <- obj$p.value %>%
round(4)
r <- obj$estimate %>%
round(4)
p <- plot_df %>%
ggplot(aes(x = predicted_fraction, y = mean_fraction)) +
geom_point(size = 4, aes(color = sample, shape = cell_type)) +
geom_smooth(method = 'lm') +
geom_abline() +
geom_errorbar(aes(ymin = mean_fraction - sd_fraction,
ymax = mean_fraction + sd_fraction),
width = sd(plot_df$predicted_fraction) / 8) +
theme_bw() +
theme(axis.text.x = element_text(angle = 90, size = 12)) +
theme(axis.text.y = element_text(size = 12)) +
theme(strip.text.y = element_text(size = 10, angle = 0)) +
ggtitle(str_c(type, ", ground truth vs Cibersort predictions, R=", r, " P=", p)) +
ylab("Ground truth fraction") +
xlab("Cibersort predicted fraction")
print(p)
}
make_cibersort_vs_ground_truth_plots <- function(config){
results_df <- config$synapse_ids$cibersort_results%>%
dowload_and_format_cibersort_df %>%
group_cell_types(config$cs_gt_groups) %>%
select(c("sample", config$cibersort_common_groups)) %>%
gather("cell_type", "predicted_fraction", -"sample")
ground_truth_df <- config$synapse_ids$ground_truth %>%
create_df_from_synapse_id %>%
set_colnames(str_replace_all(colnames(.), "\\.", "_")) %>%
set_colnames(str_replace_all(colnames(.), "[:space:]", "_")) %>%
group_cell_types(config$gt_cs_groups) %>%
select(c("sample", config$cibersort_common_groups)) %>%
gather("cell_type", "fraction", -sample) %>%
mutate(fraction = fraction / 100) %>%
.[complete.cases(.),] %>%
group_by(sample, cell_type) %>%
dplyr::summarise(sd_fraction = sd(fraction), mean_fraction = mean(fraction))
plot_df <-
inner_join(results_df, ground_truth_df)
create_cs_scatter_plot("All_cells", plot_df)
cell_types <- plot_df %>%
use_series(cell_type) %>%
unique %>%
sort
plot_dfs <- plot_df %>%
split(.$cell_type)
walk2(cell_types, plot_dfs, create_cs_scatter_plot)
}
make_cibersort_vs_ground_truth_plots(config)
## Joining, by = c("sample", "cell_type")




create_mcp_scatter_plot <- function(type, plot_df){
obj <- cor.test(
plot_df$score,
plot_df$mean_fraction)
p <- obj$p.value %>%
round(4)
r <- obj$estimate %>%
round(4)
p <- plot_df %>%
ggplot(aes(x = score, y = mean_fraction)) +
geom_point(size = 4, aes(color = sample, shape = cell_type)) +
geom_smooth(method = 'lm') +
geom_errorbar(aes(ymin = mean_fraction - sd_fraction,
ymax = mean_fraction + sd_fraction),
width = sd(plot_df$score) / 8) +
theme(axis.text.x = element_text(angle = 90, size = 12)) +
theme(axis.text.y = element_text(size = 12)) +
theme(strip.text.y = element_text(size = 10, angle = 0)) +
ggtitle(str_c(type, ", ground truth vs MCPcounter scores, R=", r, " P=", p)) +
ylab("Ground truth fraction") +
xlab("MCPcounter score")
print(p)
}
make_mcpcounter_vs_ground_truth_plots <- function(config){
results_df <- config$synapse_ids$mcpcounter_results %>%
dowload_and_format_mcpcounter_df %>%
group_cell_types(config$mcp_gt_groups) %>%
select(c("sample", config$mcpcounter_common_groups)) %>%
gather("cell_type", "score", -"sample")
ground_truth_df <- config$synapse_ids$ground_truth %>%
create_df_from_synapse_id %>%
set_colnames(str_replace_all(colnames(.), "\\.", "_")) %>%
set_colnames(str_replace_all(colnames(.), "[:space:]", "_")) %>%
group_cell_types(config$gt_mcp_groups) %>%
select(c("sample", config$mcpcounter_common_groups)) %>%
gather("cell_type", "fraction", -sample) %>%
mutate(fraction = fraction / 100) %>%
.[complete.cases(.),] %>%
group_by(sample, cell_type) %>%
dplyr::summarise(sd_fraction = sd(fraction), mean_fraction = mean(fraction))
plot_df <- inner_join(results_df, ground_truth_df)
cell_types <- plot_df %>%
use_series(cell_type) %>%
unique %>%
sort
plot_dfs <- plot_df %>%
split(.$cell_type)
walk2(cell_types, plot_dfs, create_mcp_scatter_plot)
}
make_mcpcounter_vs_ground_truth_plots(config)
## Joining, by = c("sample", "cell_type")

create_cibersort_gene_heatmaps <- function(config){
heatmap_col_df <- config$synapse_ids$annotations %>%
create_df_from_synapse_id %>%
arrange(sample) %>%
data.frame %>%
column_to_rownames("sample")
gene_df <- config$synapse_ids$cibersort_genes %>%
create_df_from_synapse_id %>%
filter(Method == "cibersort")
genes <- gene_df %>%
use_series("Hugo") %>%
unique %>%
sort
zscore_m <- config$synapse_ids$log_tpm_expression %>%
create_df_from_synapse_id %>%
df_to_matrix("Hugo") %>%
.[rowSums(.) > 0,] %>%
quantile_normalize_matrix %>%
zscore_matrix %>%
.[rownames(.) %in% genes,] %>%
.[complete.cases(.),]
pheatmap(
zscore_m,
main = "Cibersort genes",
annotation_col = heatmap_col_df,
scale = "none",
fontsize = 15,
fontsize_row = 5)
}
create_cibersort_gene_heatmaps(config)

create_mcpcounter_gene_heatmaps <- function(
annotations, mcpcounter_genes, log_tpm_expression){
mcp_heatmap_col_df <- annotations %>%
create_df_from_synapse_id %>%
arrange(sample) %>%
data.frame %>%
column_to_rownames("sample")
mcp_gene_df <- mcpcounter_genes %>%
create_df_from_synapse_id %>%
filter(Method == "mcpcounter")
mcp_genes <- mcp_gene_df %>%
use_series("Hugo") %>%
unique %>%
sort
mcp_zscore_matrix <- log_tpm_expression %>%
create_df_from_synapse_id %>%
df_to_matrix("Hugo") %>%
.[rowSums(.) > 0,] %>%
quantile_normalize_matrix %>%
zscore_matrix %>%
.[rownames(.) %in% mcp_genes,] %>%
.[complete.cases(.),]
mcp_heatmap_row_df <- mcp_gene_df %>%
filter(Method == "mcpcounter") %>%
filter(Hugo %in% rownames(mcp_zscore_matrix)) %>%
select(-Method) %>%
arrange(cell_type) %>%
data.frame %>%
column_to_rownames("Hugo") %>%
set_names("Cell Type")
mcp_zscore_matrix <- mcp_zscore_matrix[rownames(mcp_heatmap_row_df),]
pheatmap(
mcp_zscore_matrix,
main = "MCPCounter genes",
annotation_row = mcp_heatmap_row_df,
annotation_col = mcp_heatmap_col_df,
cluster_rows = F,
scale = "none")
pheatmap(
mcp_zscore_matrix,
main = "MCPCounter genes",
annotation_row = mcp_heatmap_row_df,
annotation_col = mcp_heatmap_col_df,
scale = "none")
}
create_mcpcounter_gene_heatmaps(
config$synapse_ids$annotations,
config$synapse_ids$mcpcounter_genes,
config$synapse_ids$log_tpm_expression
)


create_cibersort_scatterplots <- function(annotations, cibersort_results){
anno_df <- annotations %>%
create_df_from_synapse_id %>%
arrange(sample)
cs_result_df <- cibersort_results %>%
dowload_and_format_cibersort_df %>%
gather("cibersort_cell_type", "predicted_fraction", -sample) %>%
inner_join(anno_df, by = c("sample"))
cs_plot <- ggplot(cs_result_df, aes(x = cibersort_cell_type, y = predicted_fraction)) +
geom_point() +
ylab("Predicted fraction") +
xlab("Cibersort cell type") +
theme_bw() +
theme(axis.text.x = element_text(angle = 90, size = 12)) +
theme(axis.text.y = element_text(size = 12)) +
theme(strip.text.y = element_text(size = 10, angle = 0)) +
ggtitle("Cibersort Results")
if(!is.null(cs_result_df$cell_type)) cs_plot <- cs_plot + facet_grid(cell_type ~ .)
print(cs_plot)
}
create_cibersort_scatterplots(
config$synapse_ids$annotations,
config$synapse_ids$cibersort_results
)
## Warning: Unknown or uninitialised column: 'cell_type'.

create_mcpcounter_scatterplots <- function(annotations, mcpcounter_results){
anno_df <- annotations %>%
create_df_from_synapse_id %>%
arrange(sample)
mcp_result_df <- mcpcounter_results %>%
dowload_and_format_mcpcounter_df %>%
gather("mcpcounter_cell_type", "predicted_score", -sample) %>%
inner_join(anno_df, by = c("sample"))
mcp_plot <- ggplot(mcp_result_df, aes(x = mcpcounter_cell_type, y = predicted_score)) +
geom_point() +
ylab("Predicted score") +
xlab("MCPCounter cell type") +
theme_bw() +
theme(axis.text.x = element_text(angle = 90, size = 12)) +
theme(axis.text.y = element_text(size = 12)) +
theme(strip.text.y = element_text(size = 10, angle = 0)) +
ggtitle("MCPCounter results")
if(!is.null(mcp_result_df$cell_type)) mcp_plot <- mcp_plot + facet_grid(cell_type ~ .)
print(mcp_plot)
}
create_mcpcounter_scatterplots(
config$synapse_ids$annotations,
config$synapse_ids$mcpcounter_results
)
## Warning: Unknown or uninitialised column: 'cell_type'.

create_pca_plot <- function(config){
anno_df <- config$synapse_ids$annotations %>%
create_df_from_synapse_id %>%
arrange(sample)
pca_matrix <- config$synapse_ids$log_tpm_expression %>%
create_df_from_synapse_id %>%
df_to_matrix("Hugo") %>%
.[rowSums(.) > 0,] %>%
t
if(is.null(config$pca_plot_aes$size)) size = 4
else{size = config$pca_plot_aes$size}
p <- autoplot(
prcomp(pca_matrix),
data = anno_df,
shape = config$pca_plot_aes$shape,
size = size,
colour = config$pca_plot_aes$color,
main = "PC 1 vs 2") +
scale_shape_manual(values = 1:19) +
theme_bw()
print(p)
}
create_pca_plot(config)

create_cibersort_gsea_plot <- function(config){
anno_df <- config$synapse_ids$annotations %>%
create_df_from_synapse_id %>%
arrange(sample)
cs_genes <- config$synapse_ids$cibersort_genes %>%
create_df_from_synapse_id %>%
filter(Method == "cibersort") %>%
split(.$cell_type) %>%
map(use_series, Hugo)
cs_ssgsea_df <- config$synapse_ids$log_tpm_expression %>%
create_df_from_synapse_id %>%
df_to_matrix("Hugo") %>%
.[rowSums(.) > 0,] %>%
gsva(cs_genes, method = "ssgsea", verbose = F) %>%
matrix_to_df("CS_cell_type") %>%
gather(key = "sample", value = "enrichment" , -CS_cell_type) %>%
left_join(anno_df)
plot <- ggplot(cs_ssgsea_df, aes(x = CS_cell_type, y = enrichment)) +
geom_point() +
ylab("GSEA enrichment score") +
xlab("Cibersort cell type") +
theme_bw() +
theme(axis.text.x = element_text(angle = 90, size = 12)) +
theme(axis.text.y = element_text(size = 12)) +
theme(strip.text.y = element_text(size = 10, angle = 0)) +
ggtitle("GSEA with Cibersort genes")
if(!is.null(anno_df$cell_type)) plot <- plot + facet_grid(cell_type ~ .)
print(plot)
}
create_cibersort_gsea_plot(config)
## Joining, by = "sample"
## Warning: Unknown or uninitialised column: 'cell_type'.

create_mcpcounter_gsea_plot <- function(config){
anno_df <- config$synapse_ids$annotations %>%
create_df_from_synapse_id %>%
arrange(sample)
mcp_genes <- config$synapse_ids$mcpcounter_genes %>%
create_df_from_synapse_id %>%
filter(Method == "mcpcounter") %>%
split(.$cell_type) %>%
map(use_series, Hugo)
mcp_ssgsea_df <- config$synapse_ids$log_tpm_expression %>%
create_df_from_synapse_id %>%
df_to_matrix("Hugo") %>%
.[rowSums(.) > 0,] %>%
gsva(mcp_genes, method = "ssgsea", verbose = F) %>%
matrix_to_df("MCP_cell_type") %>%
gather(key = "sample", value = "enrichment" , -MCP_cell_type) %>%
left_join(anno_df)
plot <- ggplot(mcp_ssgsea_df, aes(x = MCP_cell_type, y = enrichment)) +
geom_point() +
ylab("GSEA enrichment score") +
xlab("MCPcounter cell type") +
theme_bw() +
theme(axis.text.x = element_text(angle = 90, size = 12)) +
theme(axis.text.y = element_text(size = 12)) +
theme(strip.text.y = element_text(size = 10, angle = 0)) +
ggtitle("GSEA with MCPcounter genes")
if(!is.null(anno_df$cell_type)) plot <- plot + facet_grid(cell_type ~ .)
print(plot)
}
create_mcpcounter_gsea_plot(config)
## Joining, by = "sample"
## Warning: Unknown or uninitialised column: 'cell_type'.
